##——— call libraries –

library("grid")
library("ggplot2")
library("gplots")
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library("viridis")
## Loading required package: viridisLite
library("reshape2")
library("gridExtra")
library("RColorBrewer")
library("scales")
## 
## Attaching package: 'scales'
## The following object is masked from 'package:viridis':
## 
##     viridis_pal
library("stringr")


source('D:/Pembro-Fluvac/Analysis/PembroFluSpec_Ranalysis_files/PembroFluSpec_PlottingFunctions.R')

———– merge & qc data from spreadsheets ——————–

setwd("D:/Pembro-Fluvac/Analysis")
mergedData <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Tflow_freqParent_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
print("Data by year and cohort for T cell flow cytometry")
## [1] "Data by year and cohort for T cell flow cytometry"
table(mergedData$Cohort, mergedData$TimeCategory, mergedData$Year)
## , ,  = 1
## 
##          
##           baseline late oneWeek
##   aPD1           3    1       3
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 2
## 
##          
##           baseline late oneWeek
##   aPD1           3    2       3
##   Healthy       12   11      12
##   nonPD1         1    1       1
## 
## , ,  = 3
## 
##          
##           baseline late oneWeek
##   aPD1          16   11      16
##   Healthy       15    3      15
##   nonPD1         1    0       1
fit <- lm(data=mergedData[ , - which( colnames(mergedData) == "Label" | colnames(mergedData) == "Subject" | colnames(mergedData) == "Cohort" | 
                                        colnames(mergedData) == "TimeCategory" | colnames(mergedData) == "TimePoint")], 
          formula = TflowBatch  ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(mergedData) [ grep( paste(a, collapse="|"), colnames(mergedData)) ] <- paste0( colnames(mergedData)[ grep( paste(a, collapse="|"), colnames(mergedData)) ], ".batchEffect" ) }

serology <- read.csv(file= "D:/Pembro-Fluvac/Analysis/mergedData/SerologyMeasurements.csv", stringsAsFactors = F, header = T)
print("Data by year and cohort for serology")
## [1] "Data by year and cohort for serology"
table(serology$Cohort, serology$TimeCategory, serology$Year)
## , ,  = 1
## 
##          
##           baseline late oneWeek
##   aPD1           4    4       3
##   Healthy        0    0       0
##   nonPD1         0    0       0
## 
## , ,  = 2
## 
##          
##           baseline late oneWeek
##   aPD1          12    8       3
##   Healthy       12   12      12
##   nonPD1         3    3       1
## 
## , ,  = 3
## 
##          
##           baseline late oneWeek
##   aPD1          16   13      16
##   Healthy       15   15      15
##   nonPD1         1    1       1
temp1 <- merge(x = mergedData, y=serology, all = T, suffixes = c(".Tflow",".Serology"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))


Bflow <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Bflow_freqParent_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
print("Data by year and cohort for B cell flow cytometry")
## [1] "Data by year and cohort for B cell flow cytometry"
table(Bflow$Cohort, Bflow$TimeCategory, Bflow$Year)
## , ,  = 3
## 
##          
##           baseline late oneWeek
##   aPD1          16   13      16
##   Healthy       15    7      14
##   nonPD1         1    0       1
BflowBATCH <- Bflow[, -c( grep( colnames(Bflow), pattern=paste0(c("Naive","Lymphocytes", "CD4","CD19hi_..FreqParent","CD19hi_CD11c","CD19hi_CD16","CD19hi_CD23","CD1hi9_CD25",
                                                             "CD19hi_CD32","CD19hi_CD38lo...FreqParent","CD19hi_CD80","CD19hi_CD86","CD19hi_IgM","CD19hi_CD138",
                                                             "CD19hi_CXCR4","CD19hi_CD71","CD19hi_Ki67..CXCR4","CD19hi_Ki67..Ki67","CD38lo...FreqParent",
                                                             "CD19hi_Tbet"), collapse ="|")) )]    # cut out columns to test for batch fx, else variables > observations
fit <- lm(data=BflowBATCH[ , - which( colnames(BflowBATCH) == "Label" | colnames(BflowBATCH) == "Subject" | colnames(BflowBATCH) == "Cohort" | 
                                        colnames(BflowBATCH) == "TimeCategory" | colnames(BflowBATCH) == "TimePoint")], 
          formula = BflowBatch  ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(Bflow) [ grep( paste(a, collapse="|"), colnames(Bflow)) ] <- paste0( colnames(Bflow)[ grep( paste(a, collapse="|"), colnames(Bflow)) ], ".batchEffect" )}
temp2 <- merge(x = temp1, y=Bflow, all = T, suffixes = c(".Tflow",".Bflow"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))


Tmfi <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Tflow_medianFI_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
Tmfi <- Tmfi[, -c(grep( colnames(Tmfi), pattern=paste0(c("Dead","medfi.CD20","medfi.CD4","Freq","ICOSlo"), collapse ="|")))]  # cut out columns to test for batch fx, else variables > observations
fit <- lm(data=Tmfi[ , - which( colnames(Tmfi) == "Label" | colnames(Tmfi) == "Subject" | colnames(Tmfi) == "Cohort" | 
                                   colnames(Tmfi) == "TimeCategory" | colnames(Tmfi) == "TimePoint")], 
          formula = TmfiBatch  ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(Tmfi) [ grep( paste(a, collapse="|"), colnames(Tmfi)) ] <- paste0( colnames(Tmfi)[ grep( paste(a, collapse="|"), colnames(Tmfi)) ], ".batchEffect" )}

temp3 <- merge(x = temp2, y=Tmfi, all = T, suffixes = c(".one",".Tmedfi"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))

demog <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/clinicalMetadata.csv", stringsAsFactors = F, header=T); demog$TimePoint <- demog$Visit <- NULL
temp4 <- merge(x = temp3, y=demog, all=T, suffixes = c(".two",".demog"), by = c('Subject', 'TimeCategory', 'Year','Cohort'))


Bmfi <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Bflow_Bmfi_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
fit <- lm(data=Bmfi[ , - which( colnames(Bmfi) == "Label" | colnames(Bmfi) == "Subject" | colnames(Bmfi) == "Cohort" | 
                                    colnames(Bmfi) == "TimeCategory" | colnames(Bmfi) == "TimePoint")], 
          formula = BmfiBatch  ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(Bmfi) [ grep( paste(a, collapse="|"), colnames(Bmfi)) ] <- paste0( colnames(Bmfi)[ grep( paste(a, collapse="|"), colnames(Bmfi)) ], ".batchEffect" )}
temp5 <- merge(x = temp4, y=Bmfi, all = T, suffixes = c(".one",".TBpmedfi"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))


TBpmfi <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/mergeData_Bflow_TmfiFromB_allyrs.csv", stringsAsFactors = F, header = T, row.names = 1)
fit <- lm(data=TBpmfi[ , - which( colnames(TBpmfi) == "Label" | colnames(TBpmfi) == "Subject" | colnames(TBpmfi) == "Cohort" | 
                                  colnames(TBpmfi) == "TimeCategory" | colnames(TBpmfi) == "TimePoint")], 
          formula = TfromBBatch  ~ .)
a <- as.data.frame(summary(fit)$coefficients)
a <- rownames(a[which(a$`Pr(>|t|)` < 0.05), ] )
if (length (a) > 0) { colnames(TBpmfi) [ grep( paste(a, collapse="|"), colnames(TBpmfi)) ] <- paste0( colnames(TBpmfi)[ grep( paste(a, collapse="|"), colnames(TBpmfi)) ], ".batchEffect" )}
temp6 <- merge(x = temp5, y=TBpmfi, all = T, suffixes = c(".one",".TBpmedfi"), by= c('Label', 'TimeCategory','Cohort','Subject','TimePoint','Year'))


mergedData <- temp6

# mergedData <- read.csv(file = "D:/Pembro-Fluvac/Analysis/mergedData/allMergedData.csv", stringsAsFactors = F, header = T, row.names = 1)

mergedData$TimeCategory <- factor(mergedData$TimeCategory, levels = c("baseline", "oneWeek","late"))
mergedData$Cohort <- factor(mergedData$Cohort, levels = c("Healthy", "aPD1","nonPD1"))
mergedData$dummy <- "dummy"

rm(Bflow); rm(temp1); rm(temp2); rm(temp3); rm(temp4); rm(temp5); rm(temp6)

# write.csv(mergedData, file = "D:/Pembro-Fluvac/Analysis/mergedData/allMergedData.csv")

dataAvail <- mergedData[ , - grep(paste(c("TimePoint", "dummy", "Visit","Label"), collapse = "|"), colnames(mergedData))]
temp <- reshape(dataAvail, idvar = c('Subject', 'Cohort','Year'), timevar = c('TimeCategory'),direction = 'wide')
saveCohort <- temp[which(temp$Cohort != "nonPD1"),"Cohort"]
saveYear <- temp[which(temp$Cohort != "nonPD1"),"Year"]
simplifiedDataAvail <- temp[which(temp$Cohort != "nonPD1"), c('Subject',  
                                                              'cTfh_ICOShiCD38hi_CCR6hi...FreqParent.baseline' ,          # Tflow_freq
                                                              'Plasma.CXCL13..pg.mL..baseline' ,                          # Serology
                                                              'IgDlo_CD71hi_ActBCells.Tbet....FreqParent.baseline' ,      # Bflow_freq
                                                              'CD19hi_CD27hiCD38hi_CD20lo.PB..._medfi.Foxp3..baseline' ,  # Tflow_medFI
                                                              'Sex.baseline' ,                                            # Demographics
                                                              'CD20loCD71hi...medfi.Blimp1..baseline')]                   # Bflow_medFI
                
rownames(simplifiedDataAvail) <- simplifiedDataAvail$Subject; simplifiedDataAvail$Subject <- NULL;
simplifiedDataAvail <- as.data.frame(!is.na(simplifiedDataAvail));  simplifiedDataAvail <- simplifiedDataAvail * 2 
simplifiedDataAvail <- cbind(simplifiedDataAvail, Cohort = (as.numeric(saveCohort)-1)*2, Year = as.numeric(saveYear)-1)
names(simplifiedDataAvail) <- c("Tflow_freq","Serology","Bflow_freq", "Tflow_medFI","Demographics","Bflow_medFI","Cohort", "Year")
pheatmap(as.data.frame(t(simplifiedDataAvail)), cluster_col = F,color=gray.colors(100, start=1,end=0), fontsize_row = 18, fontsize_col = 6, main = "Data available by subject"
         # , filename = "D:/Pembro-Fluvac/Analysis/Images/dataAvailable.pdf"
         ); dev.off()

## null device 
##           1

———– Cohort description ——————–

table(mergedData$Cohort, mergedData$Sex, mergedData$Year)  
## , ,  = 1
## 
##          
##           Female Male
##   Healthy      0    0
##   aPD1         1    3
##   nonPD1       0    0
## 
## , ,  = 2
## 
##          
##           Female Male
##   Healthy      6    6
##   aPD1         6    6
##   nonPD1       0    0
## 
## , ,  = 3
## 
##          
##           Female Male
##   Healthy      6    9
##   aPD1         4   12
##   nonPD1       0    0
#pdf(file = "D:/Pembro-Fluvac/Analysis/Images/Cycle_of_immunotherapy.pdf")

hist(mergedData[which(mergedData$Cohort == "aPD1" & mergedData$TimeCategory == "baseline"), 'Cycle.of.Immunotherapy'], breaks = 50, col = "grey90",main = "Cycle of IT")   

# dev.off()
median(mergedData[which(mergedData$Cohort == "aPD1" & mergedData$TimeCategory == "baseline"), 'Cycle.of.Immunotherapy'], na.rm = T)  
## [1] 6.5
paste("Median Age aPD1: ", median(mergedData[which(mergedData$Cohort == "aPD1" & mergedData$TimeCategory == "baseline"), 'Age'], na.rm = T)  )
## [1] "Median Age aPD1:  61.5"
paste("Median Age Healthy: ", median(mergedData[which(mergedData$Cohort == "Healthy" & mergedData$TimeCategory == "baseline"), 'Age'], na.rm = T)   )
## [1] "Median Age Healthy:  33"
paste("Range Age aPD1: ", range(mergedData[which(mergedData$Cohort == "aPD1" & mergedData$TimeCategory == "baseline"), 'Age'], na.rm = T)  )
## [1] "Range Age aPD1:  30" "Range Age aPD1:  81"
paste("Range Age Healthy: ", range(mergedData[which(mergedData$Cohort == "Healthy" & mergedData$TimeCategory == "baseline"), 'Age'], na.rm = T)   )
## [1] "Range Age Healthy:  23" "Range Age Healthy:  47"

———– Global overview of phenotypic differences ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_NaiveB...FreqParent"))
prePostTimeAveraged(melted, title = "Naive B frequencies averaged", xLabel = NULL, yLabel = "NaiveB (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, CD19hi_NaiveB...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = CD19hi_NaiveB...FreqParent ~ Cohort + Year + TimeCategory, 
##     data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -41.041  -7.960   0.489  12.084  34.620 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           38.180      3.687  10.354 2.64e-16 ***
## CohortaPD1            14.065      3.900   3.607 0.000545 ***
## CohortnonPD1          -9.451     12.624  -0.749 0.456334    
## Year                      NA         NA      NA       NA    
## TimeCategoryoneWeek    4.930      4.369   1.128 0.262604    
## TimeCategorylate      -4.824      4.984  -0.968 0.336061    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 17.33 on 78 degrees of freedom
##   (89 observations deleted due to missingness)
## Multiple R-squared:  0.1829, Adjusted R-squared:  0.141 
## F-statistic: 4.365 on 4 and 78 DF,  p-value: 0.003083
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_Ki67....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+Ki67+ frequencies averaged", xLabel = NULL, yLabel = "CD19+Ki67+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, CD19hi_Ki67....FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = CD19hi_Ki67....FreqParent ~ Cohort + Year + TimeCategory, 
##     data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.6583 -0.9559 -0.3589  0.5265  8.7111 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           1.4238     0.3259   4.369  3.8e-05 ***
## CohortaPD1            0.2651     0.3446   0.769    0.444    
## CohortnonPD1         -0.1634     1.1157  -0.146    0.884    
## Year                      NA         NA      NA       NA    
## TimeCategoryoneWeek  -0.1708     0.3861  -0.442    0.659    
## TimeCategorylate      0.3994     0.4404   0.907    0.367    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.532 on 78 degrees of freedom
##   (89 observations deleted due to missingness)
## Multiple R-squared:  0.0328, Adjusted R-squared:  -0.0168 
## F-statistic: 0.6614 on 4 and 78 DF,  p-value: 0.6207
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_CD71....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+CD71+ frequencies averaged", xLabel = NULL, yLabel = "CD19+CD71+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, CD19hi_CD71....FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = CD19hi_CD71....FreqParent ~ Cohort + Year + TimeCategory, 
##     data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0318 -1.1309 -0.4282  0.8963  9.0763 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          2.46818    0.41401   5.962 6.86e-08 ***
## CohortaPD1          -0.44450    0.43785  -1.015   0.3132    
## CohortnonPD1        -0.06632    1.41735  -0.047   0.9628    
## Year                      NA         NA      NA       NA    
## TimeCategoryoneWeek  0.25628    0.49053   0.522   0.6028    
## TimeCategorylate     1.01110    0.55956   1.807   0.0746 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.946 on 78 degrees of freedom
##   (89 observations deleted due to missingness)
## Multiple R-squared:  0.04905,    Adjusted R-squared:  0.0002855 
## F-statistic: 1.006 on 4 and 78 DF,  p-value: 0.4097
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_CD80....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+CD80+ frequencies averaged", xLabel = NULL, yLabel = "CD19+CD80+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, CD19hi_CD80....FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = CD19hi_CD80....FreqParent ~ Cohort + Year + TimeCategory, 
##     data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -2.7011 -0.9985 -0.3846  0.7051  3.9340 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.33110    0.33397   9.974 1.41e-15 ***
## CohortaPD1          -0.76105    0.35319  -2.155   0.0343 *  
## CohortnonPD1        -1.22854    1.14331  -1.075   0.2859    
## Year                      NA         NA      NA       NA    
## TimeCategoryoneWeek  0.17487    0.39569   0.442   0.6598    
## TimeCategorylate     0.01458    0.45137   0.032   0.9743    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.57 on 78 degrees of freedom
##   (89 observations deleted due to missingness)
## Multiple R-squared:  0.06555,    Adjusted R-squared:  0.01763 
## F-statistic: 1.368 on 4 and 78 DF,  p-value: 0.2528
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_CD86....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+CD86+ frequencies averaged", xLabel = NULL, yLabel = "CD19+CD86+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, CD19hi_CD86....FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = CD19hi_CD86....FreqParent ~ Cohort + Year + TimeCategory, 
##     data = mergedData)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -6.127 -3.121 -1.199  1.198 38.723 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           2.5771     1.2860   2.004   0.0485 *
## CohortaPD1            3.1425     1.3600   2.311   0.0235 *
## CohortnonPD1         -1.2056     4.4026  -0.274   0.7849  
## Year                      NA         NA      NA       NA  
## TimeCategoryoneWeek   1.0570     1.5237   0.694   0.4899  
## TimeCategorylate      0.1353     1.7381   0.078   0.9381  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 6.045 on 78 degrees of freedom
##   (89 observations deleted due to missingness)
## Multiple R-squared:  0.07476,    Adjusted R-squared:  0.02731 
## F-statistic: 1.576 on 4 and 78 DF,  p-value: 0.1891
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_IgM....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+IgM+ frequencies averaged", xLabel = NULL, yLabel = "CD19+IgM+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_CD138....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+CD138+ frequencies averaged", xLabel = NULL, yLabel = "CD19+CD138+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_Tbet....FreqParent"))
prePostTimeAveraged(melted, title = "CD19+Tbet+ frequencies averaged", xLabel = NULL, yLabel = "CD19+Tbet+ (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..ICOShi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+ICOS+ frequencies averaged", xLabel = NULL, yLabel = "CD4+ICOS+ (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..ICOShi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = Live_CD3..CD4..ICOShi...FreqParent ~ Cohort + Year + 
##     TimeCategory, data = mergedData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -12.0105  -1.6260  -0.0323   1.2164   9.1895 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.09149    1.23720   2.499   0.0138 *  
## CohortaPD1           6.90950    0.54832  12.601   <2e-16 ***
## CohortnonPD1         2.05412    1.41132   1.455   0.1480    
## Year                 1.13692    0.45151   2.518   0.0131 *  
## TimeCategoryoneWeek -0.26431    0.60293  -0.438   0.6619    
## TimeCategorylate     0.09752    0.71099   0.137   0.8911    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.045 on 125 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.5851, Adjusted R-squared:  0.5685 
## F-statistic: 35.26 on 5 and 125 DF,  p-value: < 2.2e-16
subsetData <- subset(mergedData, Cohort != "nonPD1" & Year == 3 )  # given strong influence of Year in the linear model 
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..CTLA4hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+CTLA4+ frequencies averaged", xLabel = NULL, yLabel = "CD4+CTLA4+ (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..CTLA4hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = Live_CD3..CD4..CTLA4hi...FreqParent ~ Cohort + Year + 
##     TimeCategory, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.5064 -2.3519 -0.4784  1.7747 12.6697 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -7.8721     1.5939  -4.939 2.46e-06 ***
## CohortaPD1            4.6061     0.7064   6.520 1.56e-09 ***
## CohortnonPD1          0.1421     1.8182   0.078    0.938    
## Year                  5.4675     0.5817   9.399 3.38e-16 ***
## TimeCategoryoneWeek   1.1420     0.7768   1.470    0.144    
## TimeCategorylate      0.7163     0.9160   0.782    0.436    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.922 on 125 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.5482, Adjusted R-squared:  0.5301 
## F-statistic: 30.33 on 5 and 125 DF,  p-value: < 2.2e-16
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..CD38hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+CD38hi frequencies averaged", xLabel = NULL, yLabel = "CD4+CD38hi (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..CD38hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = Live_CD3..CD4..CD38hi...FreqParent ~ Cohort + Year + 
##     TimeCategory, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -9.1317 -1.9852 -0.7808  1.4314 10.4204 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           8.9334     1.4565   6.133 1.04e-08 ***
## CohortaPD1           -3.8999     0.6455  -6.042 1.62e-08 ***
## CohortnonPD1         -4.7783     1.6615  -2.876  0.00474 ** 
## Year                  0.4487     0.5315   0.844  0.40018    
## TimeCategoryoneWeek  -0.4178     0.7098  -0.589  0.55715    
## TimeCategorylate     -0.7245     0.8370  -0.866  0.38840    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.584 on 125 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.2485, Adjusted R-squared:  0.2185 
## F-statistic: 8.269 on 5 and 125 DF,  p-value: 8.93e-07
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..Foxp3hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+Foxp3hi frequencies averaged", xLabel = NULL, yLabel = "CD4+Foxp3hi (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..Foxp3hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = Live_CD3..CD4..Foxp3hi...FreqParent ~ Cohort + Year + 
##     TimeCategory, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.6337 -1.8716  0.0331  1.6646  6.8068 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)           5.7245     0.9393   6.094 1.26e-08 ***
## CohortaPD1            1.3958     0.4163   3.353  0.00106 ** 
## CohortnonPD1         -0.4404     1.0715  -0.411  0.68180    
## Year                 -0.4711     0.3428  -1.374  0.17187    
## TimeCategoryoneWeek  -0.5445     0.4578  -1.189  0.23651    
## TimeCategorylate      1.0108     0.5398   1.872  0.06348 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.312 on 125 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.1554, Adjusted R-squared:  0.1217 
## F-statistic: 4.601 on 5 and 125 DF,  p-value: 0.0006906
summary(lm( data = mergedData, Live_CD3..CD4..Foxp3hi...FreqParent ~ Cohort + Year + TimeCategory + Live_CD3..CD4..CTLA4hi...FreqParent + Live_CD3..CD4..ICOShi...FreqParent))
## 
## Call:
## lm(formula = Live_CD3..CD4..Foxp3hi...FreqParent ~ Cohort + Year + 
##     TimeCategory + Live_CD3..CD4..CTLA4hi...FreqParent + Live_CD3..CD4..ICOShi...FreqParent, 
##     data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.3134 -1.4895  0.0339  1.5967  7.2888 
## 
## Coefficients:
##                                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)                          7.01145    1.00219   6.996 1.49e-10 ***
## CohortaPD1                          -0.14411    0.59563  -0.242 0.809226    
## CohortnonPD1                        -0.65404    1.00563  -0.650 0.516662    
## Year                                -1.66127    0.41666  -3.987 0.000114 ***
## TimeCategoryoneWeek                 -0.74781    0.43072  -1.736 0.085032 .  
## TimeCategorylate                     0.85949    0.50343   1.707 0.090299 .  
## Live_CD3..CD4..CTLA4hi...FreqParent  0.19892    0.05069   3.924 0.000144 ***
## Live_CD3..CD4..ICOShi...FreqParent   0.09026    0.06530   1.382 0.169433    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 2.151 on 123 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.2808, Adjusted R-squared:  0.2398 
## F-statistic: 6.859 on 7 and 123 DF,  p-value: 6.995e-07
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Live_CD3..CD4..Ki67hi...FreqParent"))
prePostTimeAveraged(melted, title = "CD4+Ki67hi frequencies averaged", xLabel = NULL, yLabel = "CD4+Ki67hi (freq CD4)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

summary(lm( data = mergedData, Live_CD3..CD4..Ki67hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = Live_CD3..CD4..Ki67hi...FreqParent ~ Cohort + Year + 
##     TimeCategory, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -3.0387 -1.0913 -0.2433  0.7418  9.0418 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          3.952449   0.668557   5.912 3.01e-08 ***
## CohortaPD1           0.280504   0.296302   0.947   0.3456    
## CohortnonPD1        -0.690810   0.762648  -0.906   0.3668    
## Year                -0.632018   0.243987  -2.590   0.0107 *  
## TimeCategoryoneWeek  0.069804   0.325811   0.214   0.8307    
## TimeCategorylate     0.006414   0.384207   0.017   0.9867    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.645 on 125 degrees of freedom
##   (41 observations deleted due to missingness)
## Multiple R-squared:  0.06121,    Adjusted R-squared:  0.02366 
## F-statistic:  1.63 on 5 and 125 DF,  p-value: 0.1568

———– phenotypic differences ——————–

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_..FreqParent <- subsetData$cTfh_ICOShiCD38hi_..FreqParent * subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /10000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_..FreqParent"))
prePostTimeAveraged(melted, title = "+/+ cTfh frequencies averaged", xLabel = NULL, yLabel = "+/+ cTfh (freq CD4)") 

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_CTLA4hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_CTLA4hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent * 
  subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CTLA4hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ CTLA4hi frequencies averaged", xLabel = NULL, yLabel = "+/+ CTLA4hi (freq CD4)") 

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_CTLA4hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_CTLA4hi...FreqParent ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.35023 -0.10025 -0.02990  0.06669  0.91298 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)   
## (Intercept)          0.147508   0.073348   2.011  0.04654 * 
## CohortaPD1           0.108438   0.032160   3.372  0.00100 **
## Year                -0.003298   0.026809  -0.123  0.90229   
## TimeCategoryoneWeek  0.100877   0.036072   2.797  0.00601 **
## TimeCategorylate    -0.005669   0.042440  -0.134  0.89396   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1785 on 121 degrees of freedom
##   (34 observations deleted due to missingness)
## Multiple R-squared:  0.1478, Adjusted R-squared:  0.1196 
## F-statistic: 5.247 on 4 and 121 DF,  p-value: 0.0006239
# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & Year == 3)
subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent * 
  subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_Ki67hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ Ki67hi frequencies averaged", xLabel = NULL, yLabel = "+/+ Ki67hi (freq CD4)") 

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort + Year + TimeCategory))  # mergedData because of influence of Year
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_Ki67hi...FreqParent ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.17838 -0.07199 -0.02214  0.03502  0.79451 
## 
## Coefficients: (1 not defined because of singularities)
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)         0.111944   0.028914   3.872 0.000236 ***
## CohortaPD1          0.015299   0.031512   0.486 0.628796    
## Year                      NA         NA      NA       NA    
## TimeCategoryoneWeek 0.057102   0.033808   1.689 0.095549 .  
## TimeCategorylate    0.005275   0.043694   0.121 0.904237    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1331 on 72 degrees of freedom
##   (14 observations deleted due to missingness)
## Multiple R-squared:  0.04414,    Adjusted R-squared:  0.004312 
## F-statistic: 1.108 on 3 and 72 DF,  p-value: 0.3515
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_Ki67hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent * 
  subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_Ki67hi...FreqParent", fillParam="Cohort", title="Ki67 in ICOS+CD38+ cTfh at oW", 
             yLabel="+/+ Ki67hi (freq CD4) ")  

twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_cTfh..._medfi.Ki67.", fillParam="Cohort", title="Ki67 in ICOS+CD38+ cTfh at oW", 
             yLabel="+/+ for medianFI of Ki67")  
## Warning: Removed 18 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_CXCR3hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_CXCR3hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent * 
  subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CXCR3hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ CXCR3hi frequencies averaged", xLabel = NULL, yLabel = "+/+ CXCR3hi (freq CD4)") 

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_CXCR3hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_CXCR3hi...FreqParent ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.32735 -0.09347 -0.02113  0.04789  1.09346 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.191102   0.075829   2.520 0.013032 *  
## CohortaPD1           0.069720   0.033248   2.097 0.038078 *  
## Year                -0.040554   0.027715  -1.463 0.145999    
## TimeCategoryoneWeek  0.147631   0.037292   3.959 0.000128 ***
## TimeCategorylate     0.001777   0.043876   0.041 0.967752    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1846 on 121 degrees of freedom
##   (34 observations deleted due to missingness)
## Multiple R-squared:  0.1667, Adjusted R-squared:  0.1392 
## F-statistic: 6.051 on 4 and 121 DF,  p-value: 0.0001792
# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$cTfh_ICOShiCD38hi_CD25hi...FreqParent <- subsetData$cTfh_ICOShiCD38hi_CD25hi...FreqParent * subsetData$cTfh_ICOShiCD38hi_..FreqParent * 
  subsetData$cTfh_..FreqParent * subsetData$Live_CD3..CD4..Nonnaive...FreqParent /1000000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_CD25hi...FreqParent"))
prePostTimeAveraged(melted, title = "+/+ CD25hi frequencies averaged", xLabel = NULL, yLabel = "+/+ CD25hi (freq CD4)") 

summary(lm( data = subsetData, cTfh_ICOShiCD38hi_CD25hi...FreqParent ~ Cohort + Year + TimeCategory))
## 
## Call:
## lm(formula = cTfh_ICOShiCD38hi_CD25hi...FreqParent ~ Cohort + 
##     Year + TimeCategory, data = subsetData)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.047427 -0.025753 -0.007976  0.009342  0.194646 
## 
## Coefficients:
##                      Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          0.056581   0.016507   3.428 0.000832 ***
## CohortaPD1          -0.003367   0.007238  -0.465 0.642641    
## Year                -0.011194   0.006033  -1.855 0.065973 .  
## TimeCategoryoneWeek  0.010656   0.008118   1.313 0.191769    
## TimeCategorylate     0.007734   0.009551   0.810 0.419682    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04018 on 121 degrees of freedom
##   (34 observations deleted due to missingness)
## Multiple R-squared:  0.0457, Adjusted R-squared:  0.01415 
## F-statistic: 1.449 on 4 and 121 DF,  p-value: 0.2222
subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_NaiveB...FreqParent"))
prePostTimeAveraged(melted, title = "Naive B frequencies averaged", xLabel = NULL, yLabel = "NaiveB (freq CD19)") # + scale_y_continuous(breaks=seq(0,99,0.25))

———– PB and Tfh just baseline frequencies ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: cTfh +/+ at d0", yLabel="ICOS+CD38+ cTfh frequency at d0")
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: PB at d0", yLabel="CD19+IgD-CD27++CD38++ frequency at d0")
## Warning: Removed 28 rows containing non-finite values (stat_boxplot).
## Warning: Removed 28 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_ActBCells...FreqParent", fillParam="Cohort", title="All yrs: ABC at d0", yLabel="ABC (freq of CD19+IgD-CD71+) at d0")
## Warning: Removed 28 rows containing non-finite values (stat_boxplot).

## Warning: Removed 28 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="CD19hi_NaiveB...FreqParent", fillParam="Cohort", title="All yrs: Naive B at d0", yLabel="CD19+IgDhi frequency at d0")
## Warning: Removed 28 rows containing non-finite values (stat_boxplot).

## Warning: Removed 28 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="Live_CD3..CD4..ICOShi...FreqParent", fillParam="Cohort", title="All yrs: CD4+ICOS+ at d0", yLabel="CD4+ICOS+ frequency at d0")
## Warning: Removed 10 rows containing non-finite values (stat_boxplot).
## Warning: Removed 10 rows containing missing values (geom_point).

———– PB and Tfh just day 7 frequencies ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek" & Year == "3" & cTfh_ICOShiCD38hi_..FreqParent > 0)
twoSampleBox(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="Yr3: ICOS+CD38+ cTfh at d7", yLabel="ICOS+CD38+ cTfh frequency at d7") + scale_y_continuous(breaks=seq(1:12), limits=c(0.5,11))

twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="Yr3: ICOS+CD38+ cTfh at d7", yLabel="ICOS+CD38+ cTfh frequency at d7") + 
  coord_cartesian(ylim = c(0.5,11)) + scale_y_continuous(breaks=seq(1:12))

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek" & Year == "3")
twoSampleBox(data=subsetData, xData="Cohort", yData="CD19_CD27.CD38....FreqParent", fillParam="Cohort", title="Yr3: Plasmablast at d7", yLabel="CD19+CD27++CD38++ frequency at d7") + scale_y_continuous(breaks=seq(0,99,1))

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 0)
twoSampleBox(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: ICOS+CD38+ cTfh at d7", yLabel="ICOS+CD38+ cTfh frequency at d7") + scale_y_continuous(breaks=seq(0,99,1))

twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: ICOS+CD38+ cTfh at d7", yLabel="ICOS+CD38+ cTfh (freq CXCR5+)") + 
  coord_cartesian(ylim = c(0.5,11)) + scale_y_continuous(breaks=seq(1:12))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResponse_justDay7_AllYrs.pdf", device='pdf', width=7, height=7)

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBox(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: Plasmablast at d7", yLabel="CD19+CD27++CD38++ frequency at d7")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
## Warning: Removed 19 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
subsetData$CD27hiCD38hi_..FreqParent <-  subsetData$CD27hiCD38hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 100
twoSampleBox(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_..FreqParent", fillParam="Cohort", title="All yrs: PB (freq CD19) at d7", yLabel="CD19+CD27++CD38++ frequency at d7")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).

## Warning: Removed 19 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBox(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_CD20loCD71hi...FreqParent", fillParam="Cohort", title="All yrs: Plasmablast at d7", yLabel="IgDloCD71+CD20lo frequency at d7")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).

## Warning: Removed 19 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 10000
twoSampleBox(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_CD20loCD71hi...FreqParent", fillParam="Cohort", title="All yrs: Plasmablast at d7", yLabel="IgDloCD71+CD20lo frequency at d7")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).

## Warning: Removed 19 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBox(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_ActBCells...FreqParent", fillParam="Cohort", title="All yrs: ABC at d7", yLabel="ABC frequency at d7")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).

## Warning: Removed 19 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
subsetData$IgDlo_CD71hi_ActBCells...FreqParent <-  subsetData$IgDlo_CD71hi_ActBCells...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 10000
twoSampleBox(data=subsetData, xData="Cohort", yData="IgDlo_CD71hi_ActBCells...FreqParent", fillParam="Cohort", title="All yrs: ABC at d7", yLabel="ABC frequency at d7")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).

## Warning: Removed 19 rows containing missing values (geom_point).

———– Tfh and PB Fold-change at day 7 ——————–

subsetData <- subset(mergedData, TimeCategory != "late" & Year == "3" & Cohort != "nonPD1")
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="Yr3: cTfh response at one week", yLabel="ICOS+CD38+ cTfh fold-change") + scale_y_continuous(breaks=seq(0,99,1))

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & cTfh_ICOShiCD38hi_..FreqParent > 0)
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="All yrs: cTfh response at one week", yLabel="ICOS+CD38+ cTfh fold-change") + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).

twoSampleBar(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="All yrs: cTfh response at one week", yLabel="ICOS+CD38+ cTfh fold-change") + 
  coord_cartesian(ylim = c(0,7)) + scale_y_continuous(breaks=seq(1:12))
## Warning: Removed 1 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfhResponse_foldchange_AllYrs.pdf", device="pdf", width=7, height=7)

aggregate( FC_Tfhresponse, by= list( FC_Tfhresponse$Cohort), FUN=mean, na.rm = T)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
##   Group.1 Subject Cohort baseline  oneWeek       FC
## 1 Healthy      NA     NA 3.894815 4.627778 1.236258
## 2    aPD1      NA     NA 3.887727 6.236190 1.884346
subsetData <- subset(mergedData, TimeCategory != "late"  & Cohort != "nonPD1")
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="AllYrs: PB response at one week", yLabel="CD19+CD27++CD38++ (freq. IgD-) fold-change") + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 29 rows containing non-finite values (stat_boxplot).
## Warning: Removed 29 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Year == "3" & Cohort != "nonPD1")
subsetData$CD27hiCD38hi_..FreqParent <-  subsetData$CD27hiCD38hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent /100
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="Yr3: PB response (freq CD19) at one week", yLabel="CD19+CD27++CD38++ fold-change") + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent > 0)
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent")); 
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="All yrs: PB response at one week", yLabel="IgDloCD71+CD20lo fold-change") + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent > 0)
subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent")); 
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="All yrs: PB response (freq CD19) at one week", yLabel="IgDloCD71+CD20lo fold-change") + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent > 0)
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_ActBCells...FreqParent")); 
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="AllYrs: ABC response FC at one week", yLabel="ABC fold-change")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent > 0)
subsetData$IgDlo_CD71hi_ActBCells...FreqParent <-  subsetData$IgDlo_CD71hi_ActBCells...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_ActBCells...FreqParent")); 
FC_Tfhresponse$FC <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
twoSampleBox(data=FC_Tfhresponse, xData="Cohort", yData="FC", fillParam="Cohort", title="AllYrs: ABC response (freq CD19) FC at one week", yLabel="ABC fold-change")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).

## Warning: Removed 2 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1")
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent")); 
FC_Tfhresponse$PB <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "PB", fillParam = "Cohort", title = "FC PB (freqParent) vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "CD19+CD27++CD38++ PB fold-change") + scale_y_continuous(limits=c(0,12))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 16 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 16 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1"  )
subsetData$CD27hiCD38hi_..FreqParent <-  subsetData$CD27hiCD38hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("CD27hiCD38hi_..FreqParent")); 
FC_Tfhresponse$PB <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "PB", fillParam = "Cohort", title = "FC PB (as freq CD19) vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "CD19+CD27++CD38++ PB fold-change") + scale_y_continuous(limits=c(0,12))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 17 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1"  & IgDlo_CD71hi_CD20loCD71hi...FreqParent > 0 )
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent")); 
FC_Tfhresponse$PB <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "PB", fillParam = "Cohort", title = "FC PB vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "IgDloCD71+CD20lo PB fold-change")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_CD20loCD71hi...FreqParent > 0 )
subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent")); 
FC_Tfhresponse$PB <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "PB", fillParam = "Cohort", title = "FC PB (as freq CD19) vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "IgDloCD71+CD20lo PB fold-change") + scale_y_continuous(limits=c(0,12))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 2 rows containing missing values (geom_point).

subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1"  & IgDlo_CD71hi_ActBCells...FreqParent > 0 )
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_ActBCells...FreqParent")); 
FC_Tfhresponse$ABC <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "ABC", fillParam = "Cohort", title = "FC ABC vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "IgDloCD71+CD20hi ABC fold-change") + scale_y_continuous(limits=c(0,4))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).
## Warning: Removed 26 rows containing missing values (geom_smooth).

# ************** different denominator *********************
subsetData <- subset(mergedData, TimeCategory != "late" & Cohort != "nonPD1" & IgDlo_CD71hi_ActBCells...FreqParent > 0 )
subsetData$IgDlo_CD71hi_ActBCells...FreqParent <-  subsetData$IgDlo_CD71hi_ActBCells...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent
FC_Tfhresponse <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")); 
FC_Tfhresponse$FC_Tfh <- FC_Tfhresponse$`oneWeek`/FC_Tfhresponse$`baseline`
FC_Tfhresponse2 <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("IgDlo_CD71hi_ActBCells...FreqParent")); 
FC_Tfhresponse$ABC <- FC_Tfhresponse2$`oneWeek`/FC_Tfhresponse2$`baseline`
bivScatter(data1 = FC_Tfhresponse[which(FC_Tfhresponse$Cohort == "Healthy"),], data2 = FC_Tfhresponse[ which(FC_Tfhresponse$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FC_Tfh", yData = "ABC", fillParam = "Cohort", title = "FC ABC (as freq CD19) vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "IgDloCD71+CD20hi ABC fold-change") + scale_y_continuous(limits=c(0,12))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

———– PB vs cTfh response correlations ——————–

## ***************    CD27++CD38++     ********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
subsetData1 <- subset(oneWeek, Cohort == "Healthy" )    
subsetData2 <- subset(oneWeek, Cohort == "aPD1")    
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD27hiCD38hi_..FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent", 
                fillParam = "Cohort", title = "cTfh vs PB response at oneWeek", xLabel= "CD27++CD38++ (freq IgD-)", yLabel = "ICOS+CD38+ cTfh frequency") + scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "CD27hiCD38hi_..FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs CD27++CD38++ at oneWeek", xLabel= "CD27++CD38++ (freq IgD-)", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 21 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).

fit <- lm(subsetData1$CD27hiCD38hi_..FreqParent ~  subsetData1$cTfh_ICOShiCD38hi_..FreqParent)
fit <- lm(subsetData1$CD27hiCD38hi_..FreqParent ~  subsetData1$cTfh_ICOShiCD38hi_..FreqParent)
outlier <- which(cooks.distance(fit) == max(cooks.distance(fit)))       # one row identified with high Cook's distance
bivScatter(data1 = subsetData1[ -as.numeric(names(outlier)), ], data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "CD27hiCD38hi_..FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "cTfh vs PB response at oneWeek", yLabel= "27+38+ Plasmablast (freq CD19+IgD-)", xLabel = "ICOS+CD38+ cTfh frequency") + coord_cartesian(ylim = c(0,12))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_PB-27-38-d7_freqCD71.pdf", device="pdf")
# ************** different denominator *********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$CD27hiCD38hi_..FreqParent <- oneWeek$CD27hiCD38hi_..FreqParent *oneWeek$CD19hi_NotNaiveB_freqParent / 100
subsetData1 <- subset(oneWeek, Cohort == "Healthy" )    
subsetData2 <- subset(oneWeek, Cohort == "aPD1"  & cTfh_ICOShiCD38hi_..FreqParent > 1)    #  *****   OUTLIER REMOVED
bivScatter(data1 =  subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "CD27hiCD38hi_..FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "cTfh vs CD27+CD38+ at oneWeek", yLabel= "CD27+CD38+ (freq CD19)", xLabel = "ICOS+CD38+ cTfh frequency") + coord_cartesian(xlim = c(0,12),ylim = c(0,4))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

fit <- lm(subsetData1$CD27hiCD38hi_..FreqParent ~  subsetData1$cTfh_ICOShiCD38hi_..FreqParent)
outlier <- which(cooks.distance(fit) == max(cooks.distance(fit)))       # one row identified with high Cook's distance
#  by review of Cook's distance, row 26 in subsetData1 has the highest Cook's distance and will thus exclude
bivScatter(data1 =  subsetData1[ -as.numeric(names(outlier)), ], data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "CD27hiCD38hi_..FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "+/+ cTfh vs CD27+CD38+ at oneWeek", yLabel= "CD27+CD38+ (freq CD19)", xLabel = "ICOS+CD38+ cTfh frequency") + coord_cartesian(xlim = c(0,11), ylim = c(0,4))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_PB-27-38-d7_freqCD19.pdf", device="pdf", width=9, height=7)
univScatter(data = oneWeek, yData = "CD27hiCD38hi_..FreqParent", xData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs CD27+CD38+  at oneWeek", yLabel= "CD27+CD38+ (freq CD19)", xLabel = "ICOS+CD38+ cTfh frequency") + coord_cartesian(xlim = c(0,11), ylim=c(0,4))
## Warning: Removed 21 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_PB-27-38-d7_freqCD19_univ.pdf", device="pdf")



## ***************    CD71+ CD20loPB    ********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
subsetData1 <- subset(oneWeek, Cohort == "Healthy" )   
subsetData2 <- subset(oneWeek, Cohort == "aPD1"  )    
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
                fillParam = "Cohort", title = "cTfh vs CD20lo response at oneWeek", yLabel= "CD71+CD20lo frequency", xLabel = "ICOS+CD38+ cTfh frequency") 
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs ASC at oneWeek", xLabel= "CD20lo (freq CD71+)", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 21 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).

# ************** different denominator *********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  oneWeek$IgDlo_CD71hi_CD20loCD71hi...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000
subsetData1 <- subset(oneWeek, Cohort == "Healthy" )   
subsetData2 <- subset(oneWeek, Cohort == "aPD1"  )    
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "cTfh vs CD20lo response at oneWeek", yLabel= "PB CD71+CD20lo (freq CD19)", xLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).

univScatter(data = oneWeek, yData = "IgDlo_CD71hi_CD20loCD71hi...FreqParent", xData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs ASC at oneWeek", yLabel= "PB CD71+CD20lo (freq CD19)", xLabel = "ICOS+CD38+ cTfh frequency") + coord_cartesian(xlim = c(0,12), ylim=c(0,3))
## Warning: Removed 21 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_PB-71hi-20lo-d7_freqCD19_univ.pdf", device="pdf")


## ***************    CD71+ CD20hi ABC    ********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
subsetData1 <- subset(oneWeek, Cohort == "Healthy"  )    
subsetData2 <- subset(oneWeek, Cohort == "aPD1" ) 
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_ActBCells...FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent", 
                fillParam = "Cohort", title = " cTfh vs ABC at oneWeek", xLabel= "ABC CD20hi (freq CD71+)", yLabel = "ICOS+CD38+ cTfh frequency") + 
  scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1), limits = c(0,15))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).

univScatter(data = oneWeek, yData = "IgDlo_CD71hi_ActBCells...FreqParent", xData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs ABC at oneWeek", yLabel= "ABC CD20hi (freq CD71+)", xLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 21 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).

# ************** different denominator *********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$IgDlo_CD71hi_ActBCells...FreqParent <-  oneWeek$IgDlo_CD71hi_ActBCells...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000
subsetData1 <- subset(oneWeek, Cohort == "Healthy"   )   
subsetData2 <- subset(oneWeek, Cohort == "aPD1"  )      
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", yData = "IgDlo_CD71hi_ActBCells...FreqParent", xData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "cTfh vs ABC at oneWeek", yLabel= "ABC CD20hi (freq CD19)", xLabel = "ICOS+CD38+ cTfh frequency") 
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).

univScatter(data = oneWeek, yData = "IgDlo_CD71hi_ActBCells...FreqParent", xData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "+/+ cTfh vs CD20hi at oneWeek", yLabel= "ABC CD20hi (freq CD19)", xLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 21 rows containing non-finite values (stat_smooth).
## Warning: Removed 21 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_ABC-71hi-20hi-d7_freqCD19_univ.pdf", device="pdf")




oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
subsetData1 <- subset(oneWeek, Cohort == "Healthy" )    
subsetData2 <- subset(oneWeek, Cohort == "aPD1" & cTfh_ICOShiCD38hi_..FreqParent > 1)    #  *****   OUTLIER REMOVED
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD27hiCD38hi_..FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent", 
                fillParam = "Cohort", title = "AllYrs: cTfh vs PB response at oneWeek", xLabel= "Plasmablast frequency", yLabel = "ICOS+CD38+ cTfh frequency") + scale_x_continuous(breaks=seq(0,99,1)) + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

# ************** different denominator *********************
oneWeek <- subset(mergedData, TimeCategory == "oneWeek")
oneWeek$CD27hiCD38hi_..FreqParent <-  oneWeek$CD27hiCD38hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent /100
subsetData1 <- subset(oneWeek, Cohort == "Healthy" & Year == "3" )    
subsetData2 <- subset(oneWeek, Cohort == "aPD1" & Year == "3" & cTfh_ICOShiCD38hi_..FreqParent > 1)    #  *****   OUTLIER REMOVED
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD27hiCD38hi_..FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "cTfh vs PB response at oneWeek", xLabel= "Plasmablast frequency (freq CD19)", yLabel = "ICOS+CD38+ cTfh frequency") #+ scale_x_continuous(breaks=seq(0,99,5)) + scale_y_continuous(breaks=seq(0,99,1))
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy")    ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_ActBCells...FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "AllYrs: cTfh vs ActivB response at oneWeek", xLabel= "CD20hi (freq CD71+)", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_ActBCells...FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "AllYrs: cTfh vs CD20hi response at oneWeek", xLabel= "CD20hi (freq CD71+)", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

oneWeek$CD27hiCD38hi_..FreqParent <-  oneWeek$CD27hiCD38hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent /100
oneWeek$IgDlo_CD71hi_ActBCells...FreqParent <-  oneWeek$IgDlo_CD71hi_ActBCells...FreqParent * oneWeek$IgDlo_CD71hi_..FreqParent * oneWeek$CD19hi_NotNaiveB_freqParent / 10000
fit <- lm(oneWeek$CD27hiCD38hi_..FreqParent ~  oneWeek$cTfh_ICOShiCD38hi_..FreqParent)
outlier <- which(cooks.distance(fit) == max(cooks.distance(fit)))       # one row identified with high Cook's distance
cTfhCorrel <- function( data, subset )
{
  z <- vector()
  z[1] <- subset
  z[2] <- round(cor(data[ which(data$Cohort == "Healthy"), subset], data[ which(data$Cohort == "Healthy") ,"cTfh_ICOShiCD38hi_..FreqParent"], method = "pearson", use = "complete.obs"), 2)
  z[3] <- round(cor(data[ which(data$Cohort == "aPD1"), subset], data[ which(data$Cohort == "aPD1"),"cTfh_ICOShiCD38hi_..FreqParent"], method = "pearson", use = "complete.obs"), 2)
  return(z)
}
result <- data.frame(CellType = 0, Healthy = 0, aPD1 = 0)
result[1,] <- cTfhCorrel(oneWeek, subset = "IgDlo_CD71hi_ActBCells...FreqParent")
result[2,] <- cTfhCorrel(oneWeek[ -as.numeric(names(outlier)), ], subset = "CD27hiCD38hi_..FreqParent")
result$CellType <- str_replace(result$CellType, "IgDlo_CD71hi_ActBCells...FreqParent", "Activ B cells")
result$CellType <- str_replace(result$CellType, "CD27hiCD38hi_..FreqParent", "CD27++CD38++ ASC")
result <- melt(result, id.vars = "CellType", measure.vars = c("Healthy","aPD1")); result$value <- as.numeric(result$value)
result$CellType <- factor(result$CellType, levels = c("CD27++CD38++ ASC",  "Activ B cells"))
ggplot(result, aes(x=CellType, fill=variable, color="bl")) + geom_bar(aes(y = value), stat='identity', position="dodge" ) + theme_bw()  + 
  scale_fill_manual(values=c("#7FAEDB", "#FFB18C")) + scale_color_manual(values="black") + ggtitle("Pearson correl with\n ICOS+CD38+ cTfh at oneWeek") + ylab("Pearson r") + 
  theme(axis.text.x = element_text(size=12,hjust = 1, angle = 45), axis.title = element_text(size=12,hjust = 0.5), plot.title = element_text(size=14,hjust = 0.5), 
        axis.title.x = element_blank(), axis.text.y = element_text(size=12), legend.position = "none")+ scale_y_continuous(breaks = seq(-1,1,0.2), limits=c(-1,1))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_various-d7_pearsons.pdf", device = "pdf", width=3, height=5)

———– Averaged frequencies over time ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(subsetData, xData = "TimeCategory", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "Cohort", title = "cTfh response over time", xLabel = "TimeCategory",
                 yLabel = "ICOS+CD38+ cTfh frequency", groupby = "Subject") + scale_y_continuous(breaks=seq(0,150,5))     # limits = c(0,45)
## Warning: Removed 31 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()
## No summary function supplied, defaulting to `mean_se()
## Warning: Removed 31 rows containing missing values (geom_path).
## Warning: Removed 31 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(subsetData, xData = "TimeCategory", yData = "CD19_CD27.CD38....FreqParent", fillParam = "Cohort", title = "PB response over time", xLabel = "TimeCategory",
                 yLabel = "Plasmablast frequency", groupby = "Subject")  + scale_y_continuous(breaks=seq(0,150,5))     # limits = c(0,45)
## Warning: Removed 31 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()
## No summary function supplied, defaulting to `mean_se()
## Warning: Removed 31 rows containing missing values (geom_path).

## Warning: Removed 31 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("cTfh_ICOShiCD38hi_..FreqParent"))
prePostTimeAveraged(melted, title = "cTfh responses averaged", xLabel = NULL, yLabel = "Average ICOS+CD38+ cTfh frequency") + scale_y_continuous(breaks=seq(0,99,0.5))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD19hi_..FreqParent"))
prePostTimeAveraged(melted, title = "CD19+ frequencies averaged", xLabel = NULL, yLabel = "CD19+ (freq of CD14-CD4- live)") #+ scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD27hiCD38hi_..FreqParent"))
prePostTimeAveraged(melted, title = "Plasmablast responses averaged", xLabel = NULL, yLabel = "CD27++CD38++ Plasmablast frequency") + scale_y_continuous(breaks=seq(0,99,0.25))

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$CD27hiCD38hi_..FreqParent <-  subsetData$CD27hiCD38hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent /100
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("CD27hiCD38hi_..FreqParent"))
prePostTimeAveraged(melted, title = "Plasmablast responses averaged", xLabel = NULL, yLabel = "CD27++CD38++ Plasmablast frequency") + scale_y_continuous(breaks=seq(0,99,0.25))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent"))
prePostTimeAveraged(melted, title = "IgDloCD71+CD20lo responses averaged", xLabel = NULL, yLabel = "Average IgDloCD71+CD20lo frequency") + scale_y_continuous(breaks=seq(0,99,3))

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent <-  subsetData$IgDlo_CD71hi_CD20loCD71hi...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 10000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("IgDlo_CD71hi_CD20loCD71hi...FreqParent"))
prePostTimeAveraged(melted, title = "IgDloCD71+CD20lo responses averaged", xLabel = NULL, yLabel = "Average IgDloCD71+CD20lo frequency") #+ scale_y_continuous(breaks=seq(0,99,3))

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("IgDlo_CD71hi_ActBCells...FreqParent"))
prePostTimeAveraged(melted, title = "IgDloCD71+CD20+ ABC responses averaged", xLabel = NULL, yLabel = "Average IgDloCD71+CD20+ frequency") + scale_y_continuous(breaks=seq(0,99,3))

# ************** different denominator *********************
subsetData <- subset(mergedData, Cohort != "nonPD1" )
subsetData$IgDlo_CD71hi_ActBCells...FreqParent <-  subsetData$IgDlo_CD71hi_ActBCells...FreqParent * subsetData$IgDlo_CD71hi_..FreqParent * subsetData$CD19hi_NotNaiveB_freqParent / 10000
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("IgDlo_CD71hi_ActBCells...FreqParent"))
prePostTimeAveraged(melted, title = "IgDloCD71+CD20+ ABC responses averaged", xLabel = NULL, yLabel = "Average IgDloCD71+CD20+ frequency") #+ scale_y_continuous(breaks=seq(0,99,3))

———– Vaccine response determinants and biomarkers ——————–

# correlate HAI vs cycle of IT
subsetData <- subset(mergedData,  cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
univScatter(data = subsetData, xData = "HAI.titer..H1N1pdm09.", yData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "AllYrs: Cycle of IT vs HAI titer", xLabel= "HAI titer H1N1pdm09", yLabel = "Cycle of immunotherapy")
## Warning: Removed 105 rows containing non-finite values (stat_smooth).
## Warning: Removed 105 rows containing missing values (geom_point).

univScatter(data = subsetData, yData = "cTfh_ICOShiCD38hi_..FreqParent", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "AllYrs: Cycle of IT vs +/+ cTfh at bL", yLabel= "ICOS+CD38+ cTfh freq", xLabel = "Cycle of immunotherapy") + coord_cartesian(ylim= c(0,8))
## Warning: Removed 105 rows containing non-finite values (stat_smooth).

## Warning: Removed 105 rows containing missing values (geom_point).

summary(fit <- lm(Cycle.of.Immunotherapy ~ cTfh_ICOShiCD38hi_..FreqParent, subsetData))
## 
## Call:
## lm(formula = Cycle.of.Immunotherapy ~ cTfh_ICOShiCD38hi_..FreqParent, 
##     data = subsetData)
## 
## Residuals:
##    Min     1Q Median     3Q    Max 
## -8.706 -4.822 -1.891  2.732 18.037 
## 
## Coefficients:
##                                Estimate Std. Error t value Pr(>|t|)  
## (Intercept)                      19.877      6.991   2.843   0.0117 *
## cTfh_ICOShiCD38hi_..FreqParent   -2.550      1.786  -1.428   0.1726  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 7.531 on 16 degrees of freedom
##   (105 observations deleted due to missingness)
## Multiple R-squared:  0.113,  Adjusted R-squared:  0.05758 
## F-statistic: 2.039 on 1 and 16 DF,  p-value: 0.1726
FC_response <- dcast( subset(mergedData, TimeCategory != "late" & cTfh_ICOShiCD38hi_..FreqParent > 0), `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")) 
FC_response$FCtfh <- FC_response$`oneWeek`/FC_response$`baseline`; FC_response$Cohort <- NULL
subsetData <- merge(x=FC_response, y=subsetData, by = "Subject")
univScatter(data = subsetData, yData = "FCtfh", xData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "AllYrs: Cycle of IT vs +/+ cTfh FC", yLabel= "ICOS+CD38+ cTfh FoldChange", xLabel = "Cycle of immunotherapy")  
## Warning: Removed 105 rows containing non-finite values (stat_smooth).

## Warning: Removed 105 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1")    
FC_response <- dcast( subset(mergedData, TimeCategory != "oneWeek"), `Subject`+`Cohort`~`TimeCategory`, value.var = c("HAI.titer..H1N1pdm09.")) 
FC_response$FChai <- FC_response$`late`/FC_response$`baseline`; FC_response$Cohort <- NULL
subsetData <- merge(x=FC_response, y=subsetData, by = "Subject")
univScatter(data = subsetData, xData = "FChai", yData = "Cycle.of.Immunotherapy", fillParam = "dummy", 
            title = "Cycle of IT vs HAI titer FC", xLabel= "HAI titer H1N1pdm09 FoldChange", yLabel = "Cycle of immunotherapy") + coord_cartesian(xlim = c(0,10))
## Warning: Removed 139 rows containing non-finite values (stat_smooth).
## Warning: Removed 139 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CycleIT_vs_HAItiterFC.pdf", device='pdf', width=6, height=6)


subsetData <- subset(mergedData, TimeCategory != "oneWeek" & Cohort != "nonPD1" )
FC_response <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("HAI.titer..H1N1pdm09."))
FC_response$FChai <- FC_response$`late`/FC_response$`baseline`
FC_response2 <- dcast( subset(mergedData, TimeCategory != "late" & cTfh_ICOShiCD38hi_..FreqParent > 0), `Subject`+`Cohort`~`TimeCategory`, value.var = c("cTfh_ICOShiCD38hi_..FreqParent")) 
FC_response2$FCtfh <- FC_response2$`oneWeek`/FC_response2$`baseline`; FC_response2$Cohort <- NULL
FC_response <- merge(x=FC_response, y=FC_response2, by = "Subject")
bivScatter(data1 = FC_response[which(FC_response$Cohort == "Healthy"),], data2 = FC_response[ which(FC_response$Cohort == "aPD1") ,], 
           name1 = "Healthy", name2 = "aPD1", xData = "FCtfh", yData = "FChai", fillParam = "Cohort", title = "FC HAI vs FC Tfh", xLabel = "ICOS+CD38+ cTfh fold-change", 
           yLabel = "H1N1pdm09 titer fold-change") + scale_y_continuous(limits=c(1,128), trans = "log2")
## Warning: Removed 2 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).
## Warning: Removed 24 rows containing missing values (geom_smooth).
## Warning: Removed 4 rows containing missing values (geom_smooth).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("HAI.titer..H1N1pdm09.")); melted <- melted[ which( !is.na(melted$value) ),]
overTime <- aggregate( melted$value, by= list(melted$variable, melted$TimeCategory, melted$Cohort), FUN=mean, na.rm = T); overTime
##                 Group.1  Group.2 Group.3        x
## 1 HAI.titer..H1N1pdm09. baseline Healthy 289.4815
## 2 HAI.titer..H1N1pdm09.  oneWeek Healthy 343.1111
## 3 HAI.titer..H1N1pdm09.     late Healthy 328.5926
## 4 HAI.titer..H1N1pdm09. baseline    aPD1 122.5000
## 5 HAI.titer..H1N1pdm09.  oneWeek    aPD1 254.4762
## 6 HAI.titer..H1N1pdm09.     late    aPD1 405.7600
aPD1_seroprot <- length(which(melted$value > 40 & melted$Cohort == "aPD1" & melted$TimeCategory == "late") )
aPD1_total <- length(which(melted$Cohort == "aPD1" & melted$TimeCategory == "late") )
HC_seroprot <- length(which(melted$value > 40 & melted$Cohort == "Healthy" & melted$TimeCategory == "late") )
HC_total <- length(which(melted$Cohort == "Healthy" & melted$TimeCategory == "late") )
continTable <- matrix(c(aPD1_seroprot, aPD1_total - aPD1_seroprot, HC_seroprot, HC_total - HC_seroprot), ncol=2); continTable
##      [,1] [,2]
## [1,]   24   23
## [2,]    1    4
fisher.test(continTable)
## 
##  Fisher's Exact Test for Count Data
## 
## data:  continTable
## p-value = 0.3517
## alternative hypothesis: true odds ratio is not equal to 1
## 95 percent confidence interval:
##    0.3666691 213.6992862
## sample estimates:
## odds ratio 
##   4.072668
seroprot <- data.frame( Cohort = c("Healthy", "aPD1"), Seroprot = c(HC_seroprot / HC_total, aPD1_seroprot / aPD1_total))
seroprot$Cohort <- factor(seroprot$Cohort, levels = c("Healthy", "aPD1"))
ggplot(data=seroprot, aes(x=Cohort, y=Seroprot, fill=Cohort)) + scale_fill_manual(values = c("#7FAEDB", "#FFB18C")) +  
  geom_bar( position = position_dodge(), stat = "identity") + ggtitle("Seroprotection") + ylab("Proportion seroprotected") +  theme_bw() +
  theme(axis.text = element_text(size=28,hjust = 0.5), axis.title = element_text(size=28,hjust = 0.5), axis.title.x = element_blank(), plot.title = element_text(size=32,hjust = 0.5)) + 
  theme(legend.position = "none") + coord_cartesian(ylim = c(0,1)) + scale_y_continuous(breaks = seq(0,1,0.1))

# ggsave (filename = "D:/Pembro-Fluvac/Analysis/Images/Seroprotection_byCohort.pdf", device="pdf", width=6, height = 6)

prePostTimeAveraged(melted, title = "HAI responses", xLabel = NULL, yLabel = "H1N1pdm09 HAI titer") + scale_y_continuous(trans = 'log2', breaks=c(2^(2:14)))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAIresponsesTimeAveraged.pdf", device = "pdf", width=7, height=7)
summary( fit <- aov(value ~ Cohort + TimeCategory, data=melted ) )
##               Df   Sum Sq Mean Sq F value Pr(>F)  
## Cohort         1   203563  203563   2.176 0.1422  
## TimeCategory   2   747464  373732   3.995 0.0203 *
## Residuals    155 14499191   93543                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
TukeyHSD(fit)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = value ~ Cohort + TimeCategory, data = melted)
## 
## $Cohort
##                   diff      lwr      upr     p adj
## aPD1-Healthy -71.57455 -167.419 24.26991 0.1421936
## 
## $TimeCategory
##                       diff       lwr      upr     p adj
## oneWeek-baseline  97.91185 -42.77313 238.5968 0.2292008
## late-baseline    162.36780  24.69877 300.0368 0.0162151
## late-oneWeek      64.45595 -80.41462 209.3265 0.5447789
subsetData <- subset(melted, TimeCategory == "late")
# subsetData$value <- log2(subsetData$value)
# wilcox.test(subsetData$value ~ subsetData$Cohort)
twoSampleBarMelted(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="Late H1N1pdm09 HAI titer", yLabel="H1N1pdm09 HAI titer") + 
  scale_y_continuous(trans = "log2",breaks=c(2^(2:14))) + coord_cartesian(ylim = c(4,4096))

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/HAItiter_late_byCohort.pdf", device="pdf", width=6, height=6)






subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBar(data=subsetData, xData="Cohort", yData="cTfh_ICOShiCD38hi_..FreqParent", fillParam="Cohort", title="ICOS+CD38+ cTfh at baseline", yLabel="ICOS+CD38+ cTfh (freq CXCR5+)") +
  coord_cartesian(ylim = c(0,12))
## Warning: Removed 10 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh_baseline_freq_byCohort.pdf", device="pdf", width=7, height=7)

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "late")
twoSampleBox(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_..FreqParent", fillParam="Cohort", title="PB response at Late", yLabel="Plasmablast freq - Late")
## Warning: Removed 32 rows containing non-finite values (stat_boxplot).
## Warning: Removed 32 rows containing missing values (geom_point).

———– Scatterplots of HAI titer vs CXCL13 ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("Plasma.CXCL13..pg.mL."))
melted <- melted[-which(melted$Subject == "FS1718-120"),]  # remove FS1718-120 because outlier
prePostTimeAveraged(melted, title = "CXCL13 responses", xLabel = NULL, yLabel = "Plasma CXCL13 (pg/mL)") + scale_y_continuous(breaks=seq(0,99,2))

summary( fit <- aov(value ~ Cohort*TimeCategory, data=melted ) )
##                      Df Sum Sq Mean Sq F value Pr(>F)
## Cohort                1   2169    2169   1.085  0.299
## TimeCategory          2   8505    4252   2.128  0.123
## Cohort:TimeCategory   2   5654    2827   1.415  0.246
## Residuals           148 295733    1998               
## 3 observations deleted due to missingness
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_TimeAveraged.pdf", device="pdf", width=7, height=7)

subsetData <- subset(melted, TimeCategory == "oneWeek")
rownames(subsetData) <- seq(1:nrow(subsetData))
twoSampleBox(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="CXCL13 at oneWeek", yLabel="plasma CXCL13 (pg/mL)")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).

fit <- lm (value ~ Cohort, subsetData)  
outlier <- which(cooks.distance(fit) == max(cooks.distance(fit))) ;  subsetData <- subsetData[ - as.numeric( names(outlier)), ]        # one row identified with high Cook's distance
subsetData <- subsetData[which(!is.na( subsetData$value)), ]
twoSampleBox(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="CXCL13 at oneWeek", yLabel="plasma CXCL13 (pg/mL)")

twoSampleBar(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="CXCL13 at oneWeek", yLabel="plasma CXCL13 (pg/mL)") 

aggregate( subsetData, by= list(subsetData$variable, subsetData$TimeCategory, subsetData$Cohort), FUN=mean, na.rm = T)
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA

## Warning in mean.default(X[[i]], ...): argument is not numeric or logical:
## returning NA
##                 Group.1 Group.2 Group.3 Subject TimeCategory Cohort variable
## 1 Plasma.CXCL13..pg.mL. oneWeek Healthy      NA           NA     NA       NA
## 2 Plasma.CXCL13..pg.mL. oneWeek    aPD1      NA           NA     NA       NA
##      value
## 1 59.03994
## 2 80.16653
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_oW.pdf", device="pdf", height=7, width = 7)

subsetData <- subset(melted, TimeCategory == "baseline")
rownames(subsetData) <- seq(1:nrow(subsetData))
twoSampleBox(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="CXCL13 at baseline", yLabel="plasma CXCL13 (pg/mL)")

fit <- lm (value ~ Cohort, subsetData)  
outlier <- which(cooks.distance(fit) == max(cooks.distance(fit))) ;  subsetData <- subsetData[ - as.numeric( names(outlier)), ]        # one row identified with high Cook's distance
subsetData <- subsetData[which(!is.na( subsetData$value)), ]
twoSampleBox(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="CXCL13 at baseline", yLabel="plasma CXCL13 (pg/mL)")

twoSampleBar(data=subsetData, xData="Cohort", yData="value", fillParam="Cohort", title="CXCL13 at baseline", yLabel="plasma CXCL13 (pg/mL)") 

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/CXCL13_bL.pdf", device="pdf", height=7, width = 7)

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory != "late" )
prePostTime(data = subsetData, xData = "TimeCategory", yData = "Plasma.CXCL13..pg.mL.", fillParam = "Cohort", title = "CXCL13 levels", xLabel = "TimeCategory",
            yLabel = "plasma CXCL13 (pg/mL)", groupby = "Subject"); a + scale_y_continuous(trans='log2')
## Warning: Removed 2 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()
## No summary function supplied, defaulting to `mean_se()
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 2 rows containing missing values (geom_point).

## NULL
subsetData <- subset(mergedData, Cohort == "aPD1" & TimeCategory == "oneWeek" )
univScatter(subsetData, xData = "HAI.titer..H1N1pdm09.", yData="Plasma.CXCL13..pg.mL.", fillParam = "Cohort", 
                 title = "All yrs aPD1: CXCL13 vs HAI at one week", xLabel= "HAI titer - H1N1pdm09", yLabel = "Plasma CXCL13 (pg/mL)")
## Warning: Removed 1 rows containing non-finite values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
crossTimeCategory <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c('Plasma.CXCL13..pg.mL.' , 'HAI.titer..H1N1pdm09.') )
crossTimeCategory2 <- dcast(crossTimeCategory, Subject + Cohort ~ TimeCategory + variable)
univScatter(crossTimeCategory2, xData = "baseline_Plasma.CXCL13..pg.mL.", yData="late_HAI.titer..H1N1pdm09.", fillParam = NULL, 
            title = "All yrs: Late HAI vs baseline CXCL13", xLabel= "baseline Plasma CXCL13 (pg/mL)", yLabel = "Late HAI titer - H1N1pdm09")   # nonstd appearance due to lack of fillParam
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

univScatter(crossTimeCategory2, xData = "oneWeek_Plasma.CXCL13..pg.mL.", yData="late_HAI.titer..H1N1pdm09.", fillParam = NULL, 
            title = "All yrs: Late HAI vs d7 CXCL13", xLabel= "oneWeek Plasma CXCL13 (pg/mL)", yLabel = "Late HAI titer - H1N1pdm09")   # nonstd appearance due to lack of fillParam
## Warning: Removed 15 rows containing non-finite values (stat_smooth).
## Warning: Removed 15 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
crossTimeCategory <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c('cTfh_ICOShiCD38hi_..FreqParent' , 'HAI.titer..H1N1pdm09.') )
crossTimeCategory2 <- dcast(crossTimeCategory, Subject + Cohort ~ TimeCategory + variable)
univScatter(crossTimeCategory2, xData = "baseline_HAI.titer..H1N1pdm09.", yData="late_HAI.titer..H1N1pdm09.", fillParam = NULL, 
            title = "All yrs: Late HAI vs baseline HAI", xLabel= "baseline HAI titer - H1N1pdm09", yLabel = "Late HAI titer - H1N1pdm09")   # nonstd appearance due to lack of fillParam
## Warning: Removed 7 rows containing non-finite values (stat_smooth).
## Warning: Removed 7 rows containing missing values (geom_point).

crossTimeCategory2$FCtfh <- crossTimeCategory2$oneWeek_cTfh_ICOShiCD38hi_..FreqParent / crossTimeCategory2$baseline_cTfh_ICOShiCD38hi_..FreqParent


 
subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(data = subsetData, xData = "TimeCategory", yData = "HAI.titer..H1N1pdm09.", fillParam = "Cohort", title = "HAI titers over time", xLabel = "TimeCategory",
            yLabel = "HAI titer", groupby = "Subject"); a + scale_y_continuous(trans='log2')
## Warning: Removed 1 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()
## No summary function supplied, defaulting to `mean_se()
## Warning: Removed 1 rows containing missing values (geom_path).
## Warning: Removed 1 rows containing missing values (geom_point).

## NULL
subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="HAI.titer..H1N1pdm09.", fillParam="Cohort", title="All yrs: HAI titers at d0", yLabel="H1N1pdm09 titer") + scale_y_continuous(trans='log2', breaks=c(2^(2:14) ) )

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBox(data=subsetData, xData="Cohort", yData="HAI.titer..H1N1pdm09.", fillParam="Cohort", title="All yrs: HAI titers at d7", yLabel="H1N1pdm09 titer") + scale_y_continuous(trans='log2', breaks=c(2^(2:14) ) )
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "late")
twoSampleBox(data=subsetData, xData="Cohort", yData="HAI.titer..H1N1pdm09.", fillParam="Cohort", title="All yrs: HAI titers at d21-28", yLabel="H1N1pdm09 titer") + scale_y_continuous(trans='log2' , breaks=c(2^(2:14) ) )

subsetData <- subset(mergedData, Cohort != "nonPD1" )
seroconv <- dcast(subsetData, `Subject`+`Cohort`~`TimeCategory`, value.var = c("HAI.titer..H1N1pdm09.")); 
seroconv$FC <- seroconv$`late`/seroconv$`baseline`
twoSampleBox(data=seroconv, xData="Cohort", yData="FC", fillParam="Cohort", title="All yrs: seroconversion", yLabel="Seroconversion factor") + scale_y_continuous(breaks=seq(0,99,4))
## Warning: Removed 7 rows containing non-finite values (stat_boxplot).
## Warning: Removed 7 rows containing missing values (geom_point).

twoSampleBar(data=seroconv, xData="Cohort", yData="FC", fillParam="Cohort", title="All yrs: seroconversion", yLabel="Seroconversion factor") + 
  scale_y_continuous(trans='log2',breaks=c(2^(1:14)))
## Warning: Removed 7 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/SeroconversionFactor.pdf", device = "pdf", width=6, height=6)





subsetData <- subset(mergedData, Cohort != "nonPD1" )
prePostTime(subsetData, xData = "TimeCategory", yData = "Plasma.CXCL13..pg.mL.", fillParam = "Cohort", title = "CXCL13 over itme", xLabel = "TimeCategory",
            yLabel = "HAI titer", groupby = "Subject") + scale_y_continuous(breaks=seq(0,150,5))     # limits = c(0,45)
## Warning: Removed 3 rows containing non-finite values (stat_summary).
## No summary function supplied, defaulting to `mean_se()
## No summary function supplied, defaulting to `mean_se()
## Warning: Removed 2 rows containing missing values (geom_path).
## Warning: Removed 3 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline")
twoSampleBox(data=subsetData, xData="Cohort", yData="Plasma.CXCL13..pg.mL.", fillParam="Cohort", title="All yrs: Plasma CXCL13 at d0", yLabel="Plasma CXCL13 (pg/mL)")

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBox(data=subsetData, xData="Cohort", yData="Plasma.CXCL13..pg.mL.", fillParam="Cohort", title="All yrs: Plasma CXCL13 at d7", yLabel="Plasma CXCL13 (pg/mL)")
## Warning: Removed 2 rows containing non-finite values (stat_boxplot).
## Warning: Removed 2 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "late")
twoSampleBox(data=subsetData, xData="Cohort", yData="Plasma.CXCL13..pg.mL.", fillParam="Cohort", title="All yrs: Plasma CXCL13 at d21-28", yLabel="Plasma CXCL13 (pg/mL)")
## Warning: Removed 1 rows containing non-finite values (stat_boxplot).
## Warning: Removed 1 rows containing missing values (geom_point).

summary(lm( Plasma.CXCL13..pg.mL. ~ Cohort + Year + TimeCategory, data=mergedData))
## 
## Call:
## lm(formula = Plasma.CXCL13..pg.mL. ~ Cohort + Year + TimeCategory, 
##     data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -69.752 -18.188  -7.785  14.485 107.456 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)    
## (Intercept)          -90.142      9.710  -9.284  < 2e-16 ***
## CohortaPD1            13.542      4.406   3.073  0.00249 ** 
## CohortnonPD1           9.294      9.243   1.005  0.31619    
## Year                  58.079      3.472  16.728  < 2e-16 ***
## TimeCategoryoneWeek    5.963      5.252   1.135  0.25788    
## TimeCategorylate      -4.627      5.065  -0.913  0.36235    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 27.4 on 161 degrees of freedom
##   (5 observations deleted due to missingness)
## Multiple R-squared:  0.6466, Adjusted R-squared:  0.6356 
## F-statistic: 58.91 on 5 and 161 DF,  p-value: < 2.2e-16

———– Working with the medianFI for key transcription factors ——————–

subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
twoSampleBox(data=subsetData, xData="Cohort", yData="Ki67hiCD38hicTfh_medfi.Blimp1.", fillParam="Cohort", title="All yrs: Blimp1 in +/+ at d7", yLabel="Blimp1 median FI")
## Warning: Removed 21 rows containing non-finite values (stat_boxplot).
## Warning: Removed 21 rows containing missing values (geom_point).

twoSampleBox(data=subsetData, xData="Cohort", yData="Ki67hiCD38hicTfh_medfi.Bcl6..batchEffect", fillParam="Cohort", title="All yrs: Bcl6 in +/+ at d7", yLabel="Bcl6 median FI")
## Warning: Removed 21 rows containing non-finite values (stat_boxplot).

## Warning: Removed 21 rows containing missing values (geom_point).

———– subset protein correlations ——————–

# subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek")
# a <- colnames(subsetData[ , sapply(subsetData, class) == 'character'])
# a <- cor(subsetData[ , - grep(paste(c(a, "Cohort"), collapse="|"), colnames(subsetData))], use="pairwise.complete.obs")
# a[order(a[,20],decreasing = F),20]


oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy")    ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_CD20loCD71hi.CD32hi...FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent", 
                fillParam = "Cohort", title = "AllYrs: cTfh vs PB CD32hi at oneWeek", xLabel= "CD20lo - CD32hi frequency", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_CD20loCD71hi.CD32hi...FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "AllYrs: cTfh vs PB CD32hi at oneWeek", xLabel= "CD20lo - CD32hi frequency", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs_PB-71hi-20lo-d7_CD32_univ.pdf")

oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy")    ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "ActBCells...medfi.CD32.", yData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "AllYrs: cTfh vs ABC CD32 at oneWeek", xLabel= "ABC - CD32 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "ActBCells...medfi.CD32.", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "AllYrs: cTfh vs ABC CD32 at oneWeek", xLabel= "ABC - CD32 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy")    ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDlo_CD71hi_CD20loCD71hi.CD86....FreqParent", yData="cTfh_ICOShiCD38hi_..FreqParent", 
                fillParam = "Cohort", title = "AllYrs: cTfh vs PB CD86 resp at oneWeek", xLabel= "CD20lo - CD86hi frequency", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDlo_CD71hi_CD20loCD71hi.CD86....FreqParent", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "AllYrs: cTfh vs CD20lo CD86+ at oneWeek", xLabel= "CD20lo - CD86hi frequency", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy")    ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD19_NonnaiveB.CD27.CD38....medfi.CD32.", yData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "AllYrs: cTfh vs PB CD32 at oneWeek", xLabel= "CD27++CD38++ PB - CD32 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "CD19_NonnaiveB.CD27.CD38....medfi.CD32.", yData = "cTfh_ICOShiCD38hi_..FreqParent", fillParam = "TimeCategory", 
            title = "AllYrs: cTfh vs CD27++CD38++ CD32 at oneWeek", xLabel= "CD27++CD38++ PB - CD32 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/cTfh-d7_vs-PB-27hi38hi-d7_CD32medFI_univ.pdf")


# as predicted by the elastic net regression
oneWeek <- subset(mergedData, TimeCategory == "oneWeek" & cTfh_ICOShiCD38hi_..FreqParent > 1 & Cohort != "nonPD1")    #  *****   OUTLIER REMOVED
subsetData1 <- subset(oneWeek, Cohort == "Healthy")    ; subsetData2 <- subset(oneWeek, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "IgDloCD71hi..medfi.CD86.", yData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "AllYrs: cTfh vs CD86 resp at oneWeek", xLabel= "IgDloCD71hi - CD86 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 12 rows containing non-finite values (stat_smooth).
## Warning: Removed 5 rows containing non-finite values (stat_smooth).
## Warning: Removed 12 rows containing missing values (geom_point).
## Warning: Removed 5 rows containing missing values (geom_point).

univScatter(data = oneWeek, xData = "IgDloCD71hi..medfi.CD86.", yData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "TimeCategory", title = "AllYrs: cTfh vs CD86 resp at oneWeek", xLabel= "IgDloCD71hi - CD86 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 17 rows containing non-finite values (stat_smooth).
## Warning: Removed 17 rows containing missing values (geom_point).

subsetData <- subset(mergedData, Cohort != "nonPD1" )
melted <- melt(subsetData, id.vars = c('Subject', 'TimeCategory', 'Cohort'), measure.vars = c("X1.Kd.HA")); melted$value <- 1/melted$value
prePostTimeAveraged(melted, title = "HA EC50/Kd by ELISA", xLabel = NULL, yLabel = "1 / Concentration (ug/mL)") + scale_y_continuous(breaks=seq(0,99,0.25))

summary(fit <- lm( data = mergedData, X1.Kd.HA ~ Cohort  + TimeCategory))
## 
## Call:
## lm(formula = X1.Kd.HA ~ Cohort + TimeCategory, data = mergedData)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -1.7869 -0.7384 -0.4336  0.1578  5.9044 
## 
## Coefficients:
##                     Estimate Std. Error t value Pr(>|t|)  
## (Intercept)           0.6462     0.3658   1.767   0.0821 .
## CohortaPD1            0.3859     0.3857   1.000   0.3209  
## TimeCategoryoneWeek   1.1021     0.4942   2.230   0.0293 *
## TimeCategorylate      1.0691     0.4351   2.457   0.0168 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 1.545 on 63 degrees of freedom
##   (105 observations deleted due to missingness)
## Multiple R-squared:  0.1159, Adjusted R-squared:  0.07385 
## F-statistic: 2.754 on 3 and 63 DF,  p-value: 0.04978
summary(fit <- aov(formula = X1.Kd.HA ~ Cohort  + TimeCategory, data = mergedData))
##              Df Sum Sq Mean Sq F value Pr(>F)  
## Cohort        1   0.96   0.958   0.402 0.5285  
## TimeCategory  2  18.75   9.377   3.931 0.0246 *
## Residuals    63 150.29   2.386                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 105 observations deleted due to missingness
TukeyHSD(fit)
##   Tukey multiple comparisons of means
##     95% family-wise confidence level
## 
## Fit: aov(formula = X1.Kd.HA ~ Cohort + TimeCategory, data = mergedData)
## 
## $Cohort
##                   diff        lwr       upr     p adj
## aPD1-Healthy 0.2391946 -0.5150433 0.9934325 0.5285435
## 
## $TimeCategory
##                          diff         lwr      upr     p adj
## oneWeek-baseline  1.064122846 -0.09774140 2.225987 0.0792318
## late-baseline     1.061795367  0.01849295 2.105098 0.0451923
## late-oneWeek     -0.002327479 -1.20924183 1.204587 0.9999882
# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/AntibodyAffinity_overTime.pdf", device="pdf", height=8, width=8)



subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "baseline"); subsetData$X1.Kd.HA <- 1 / subsetData$X1.Kd.HA
twoSampleBox(data=subsetData, xData="Cohort", yData="X1.Kd.HA", fillParam="Cohort", title="All yrs: HA EC50/Kd at baseline", yLabel="1 / Concentration (ug/mL)")
## Warning: Removed 31 rows containing non-finite values (stat_boxplot).
## Warning: Removed 31 rows containing missing values (geom_point).

twoSampleBar(data=subsetData, xData="Cohort", yData="X1.Kd.HA", fillParam="Cohort", title="All yrs: HA EC50/Kd at baseline", yLabel="1 / Concentration (ug/mL)")
## Warning: Removed 31 rows containing missing values (geom_point).

# ggsave(filename = "D:/Pembro-Fluvac/Analysis/Images/AntibodyAffinity_baseLine.pdf", device="pdf", height=7, width=7)


subsetData <- subset(mergedData, Cohort != "nonPD1" & TimeCategory == "oneWeek"); subsetData$X1.Kd.HA <- 1 / subsetData$X1.Kd.HA
twoSampleBox(data=subsetData, xData="Cohort", yData="CD20loCD71hi...medfi.Ki67.", fillParam="Cohort", title="All yrs: at oneWeek", yLabel="medFI Ki67")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
## Warning: Removed 19 rows containing missing values (geom_point).

univScatter(data=subsetData, xData = "CD20loCD71hi...medfi.Ki67.", yData="cTfh_ICOShiCD38hi_..FreqParent", 
            fillParam = "TimeCategory", title = "AllYrs: cTfh vs CD86 resp at oneWeek", xLabel= "IgDloCD71hi - CD86 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 19 rows containing non-finite values (stat_smooth).

## Warning: Removed 19 rows containing missing values (geom_point).

subsetData1 <- subset(subsetData, Cohort == "Healthy")    ; subsetData2 <- subset(subsetData, Cohort == "aPD1")   
bivScatter(data1 = subsetData1, data2 = subsetData2, name1 = "HC", name2 = "aPD1", xData = "CD20loCD71hi...medfi.Ki67.", yData="cTfh_ICOShiCD38hi_..FreqParent", 
           fillParam = "Cohort", title = "AllYrs: cTfh vs CD86 resp at oneWeek", xLabel= "IgDloCD71hi - CD86 medFI", yLabel = "ICOS+CD38+ cTfh frequency")
## Warning: Removed 13 rows containing non-finite values (stat_smooth).
## Warning: Removed 6 rows containing non-finite values (stat_smooth).
## Warning: Removed 13 rows containing missing values (geom_point).
## Warning: Removed 6 rows containing missing values (geom_point).

twoSampleBox(data=subsetData, xData="Cohort", yData="CD27hiCD38hi_Ki67....FreqParent", fillParam="Cohort", title="All yrs: at oneWeek", yLabel="medFI Ki67")
## Warning: Removed 19 rows containing non-finite values (stat_boxplot).
## Warning: Removed 19 rows containing missing values (geom_point).